0: Preparation
Defining the input and output files
# Clean the working environment
rm(list = ls())
knitr::opts_chunk$set(echo = TRUE)
# empirical_species <- "German Shepherd"
empirical_dog_breed <- Sys.getenv("empirical_dog_breed")
empirical_species <- paste(toupper(substring(unlist(strsplit(empirical_dog_breed, "_")), 1, 1)),
substring(unlist(strsplit(empirical_dog_breed, "_")), 2),
sep = "", collapse = " ")
# empirical_species <- "Labrador Retriever"
# document_title <- paste(empirical_species,"Pipeline Results - 50 N_e bottleneck scenario")
# document_title <- paste(empirical_species,"55 Gen Breed Formation - SNP chip: Post_bottleneck_generation - Mutations introduced - 50 N_e bottleneck scenario")
# document_title <- paste(empirical_species,"40 Gen Breed Formation - SNP chip: Post_bottleneck_generation - Mutations introduced - 50 N_e bottleneck scenario")
N_e_bottleneck <- as.numeric(Sys.getenv("N_e_bottleneck"))
n_simulated_generations_breed_formation <- as.numeric(Sys.getenv("n_simulated_generations_breed_formation"))
reference_population_for_snp_chip <- Sys.getenv("reference_population_for_snp_chip")
document_title <- paste0(empirical_species," ",n_simulated_generations_breed_formation," Gen Breed Formation - SNP chip: ",reference_population_for_snp_chip," - ",N_e_bottleneck," N_e bottleneck scenario")
document_sub <- Sys.getenv("document_sub_title")
# #
# # MAF_pruning_used <- TRUE
# MAF_pruning_used <- FALSE
#
# N_e <- 340
# # document_sub <- paste("MAF 0.05 used, but only for H_E-computation, N_e=", N_e)
# # document_sub <- paste("MAF 0.01 used, but only for H_E-computation, N_e=", N_e)
#
#
#
#
#
# if (MAF_pruning_used == FALSE) {
# document_sub <- paste("No MAF-based pruning used, N_e =", N_e)
#
#
# } else {
# document_sub <- paste("MAF-based pruning used, N_e =", N_e)
# }
############################################
# Parameters used for displaying the result
############################################
ROH_frequency_decimals <- 1
H_e_values_decimals <- 5
F_ROH_values_decimals <- 3
####################################
# Defining the input files
####################################
Selection_strength_test_dir <- Sys.getenv("Selection_strength_test_dir")
Sweep_test_dir <- Sys.getenv("Sweep_test_dir")
###############
## Empirical ###
###############
### ROH hotspots ###
Empirical_data_ROH_hotspots_dir <- Sys.getenv("Empirical_data_ROH_hotspots_dir")
Empirical_data_autosome_ROH_freq_dir <- Sys.getenv("Empirical_data_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Empirical_data_F_ROH_dir <- Sys.getenv("Empirical_data_F_ROH_dir")
### Expected Heterozygosity distribution ###
Empirical_data_H_e_dir <- Sys.getenv("Empirical_data_H_e_dir")
###############
## Simulated ###
###############
### Causative variant ###
variant_freq_plots_dir <- Sys.getenv("variant_freq_plots_dir")
variant_position_dir <- Sys.getenv("variant_position_dir")
pruned_replicates_count_dir <- Sys.getenv("pruned_replicates_count_dir")
Selection_causative_variant_windows_dir <- Sys.getenv("Selection_causative_variant_windows_dir")
causative_variant_windows_H_e_dir <- Sys.getenv("causative_variant_windows_H_e_dir")
### ROH hotspots ###
Neutral_model_ROH_hotspots_dir <- Sys.getenv("Neutral_model_ROH_hotspots_dir")
Neutral_model_autosome_ROH_freq_dir <- Sys.getenv("Neutral_model_autosome_ROH_freq_dir")
Selection_model_ROH_hotspots_dir <- Sys.getenv("Selection_model_ROH_hotspots_dir")
Selection_model_autosome_ROH_freq_dir <- Sys.getenv("Selection_model_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Neutral_model_F_ROH_dir <- Sys.getenv("Neutral_model_F_ROH_dir")
Selection_model_F_ROH_dir <- Sys.getenv("Selection_model_F_ROH_dir")
### Expected Heterozygosity distribution ###
Neutral_model_H_e_dir <- Sys.getenv("Neutral_model_H_e_dir")
Selection_model_H_e_dir <- Sys.getenv("Selection_model_H_e_dir")
histogram_line_sizes <- 3
empirical_data_color <- "darkgreen"
neutral_model_color <- "blue"
selection_model_color <- "purple"
output_dir <- Sys.getenv("Pipeline_results_output_dir")
# Sys.getenv()
# # Set the working directory for notebook chunks
# knitr::opts_knit$set(root.dir = output_dir_sweep_test)
Loading libraries
library(knitr)
## Warning: package 'knitr' was built under R version 4.3.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
# Install and load the scatterplot3d package
# install.packages("scatterplot3d")
library(scatterplot3d)
Causative variant
Variant fixation
Fixation probability
Fixation time
# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
# Find the index of the first row where the allele frequency is 0.9 or higher
fixation_index <- which(data$allele_frequency >= threshold)[1]
# Return the number of rows until fixation is reached
return(fixation_index - 1)
}
Summary - Variant fixation
| s=0.4 |
37.7 |
23.80 |
20 |
29 |
| s=0.5 |
54.1 |
18.05 |
14 |
24 |
| s=0.6 |
37.0 |
15.00 |
11 |
20 |
| s=0.7 |
57.1 |
10.55 |
9 |
13 |
| s=0.8 |
50.0 |
8.75 |
7 |
10 |
Causative variant windows
Variant position
Window lengths
Summary - Causative variant windows
| 1 |
s=0.4 |
2.135 |
91.0 |
48.5 |
100 |
92.6 |
| 2 |
s=0.5 |
2.355 |
86.9 |
43.0 |
100 |
88.6 |
| 4 |
s=0.7 |
2.520 |
85.3 |
37.5 |
100 |
86.7 |
| 3 |
s=0.6 |
2.810 |
91.3 |
47.0 |
100 |
93.2 |
| 5 |
s=0.8 |
3.960 |
88.3 |
42.5 |
100 |
90.2 |
Standard Error Confidence interval function
Confidence level <=> konfidensgrad
# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
# if ( nrow(observed_data) > 1 ) {
if ( length(observed_data) > 1 ) {
# Calculate standard error
n <- length(observed_data)
standard_deviation <- sd(observed_data)
standard_error <- standard_deviation / sqrt(n - 1)
# Calculate confidence interval based on standard error
alpha <- (1 - confidence_level) / 2
margin_of_error <- qnorm(1 - alpha) * standard_error
mean_estimate <- mean(observed_data)
# Calculate the percentiles for the confidence interval
confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
# Return confidence interval
return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
} else {
return(c(NA,NA))
}
}
# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#
# # Function to calculate the mean for each bootstrap sample
# compute_bootstrap_mean_fun <- function(observed_data_urn) {
# bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
# bootstrap_estimate <- mean(bootstrap_dataset)
# return(bootstrap_estimate)
# }
#
# # Perform bootstrap resampling
# bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#
# # Calculate the percentiles for the confidence interval
# alpha <- (1 - confidence_level) / 2
# confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
# confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#
# # Return the confidence interval
# return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }
1: ROH-Frequency
1.1 : Autosome ROH-frequencies
1.1.1 : Empirical - ROH frequency
1.1.2: Neutral Model - ROH frequency
1.1.3: Selection Model
1.1.4 Summary - ROH-hotspot threshold
## ROH-hotspot selection testing results://n
| Neutral |
30.0 |
| Empirical |
55.4 |
| s=0.5 |
87.9 |
| s=0.7 |
88.1 |
| s=0.8 |
90.9 |
| s=0.4 |
91.7 |
| s=0.6 |
92.5 |
1.2 ROH-hotspots - ROH Frequency and Length
2: Inbreeding coefficient
2.1 Empirical data

## Overall Average Avg_F_ROH for labrador_retriever : 0.2333818 //n
2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.1549667 //n
## [1] "Bootstrap 95% Confidence Interval: [0.150300118218385, 0.159633346781615]"
2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for selection_model_s04_chr3 : 0.2243443 //n[1] "Bootstrap 95% Confidence Interval: [0.210686975401988, 0.238001624598012]"

## Point estimate F_ROH across all 20 simulations for selection_model_s05_chr3 : 0.2382117 //n[1] "Bootstrap 95% Confidence Interval: [0.226464284699583, 0.249959090300417]"

## Point estimate F_ROH across all 20 simulations for selection_model_s06_chr3 : 0.2640368 //n[1] "Bootstrap 95% Confidence Interval: [0.251290182073292, 0.276783337926708]"

## Point estimate F_ROH across all 20 simulations for selection_model_s07_chr3 : 0.2835071 //n[1] "Bootstrap 95% Confidence Interval: [0.268001015456777, 0.299013099543223]"

## Point estimate F_ROH across all 20 simulations for selection_model_s08_chr3 : 0.3108233 //n[1] "Bootstrap 95% Confidence Interval: [0.283698645307223, 0.337947864692777]"
2.4 F_ROH summary
| Neutral |
0.155 |
0.150 |
0.160 |
| s=0.4 |
0.224 |
0.211 |
0.238 |
| Empirical |
0.233 |
NA |
NA |
| s=0.5 |
0.238 |
0.226 |
0.250 |
| s=0.6 |
0.264 |
0.251 |
0.277 |
| s=0.7 |
0.284 |
0.268 |
0.299 |
| s=0.8 |
0.311 |
0.284 |
0.338 |
2.4.1 Visualizing F_ROH

## Models where empirical F_ROH is within CI:
## [1] "s=0.4" "s=0.5"
##
## Models where empirical F_ROH is outside CI:
## [1] "s=0.6" "s=0.7" "s=0.8" "Neutral"
3: Expected Heterozygosity
3.1 Empirical data
3.2 Neutral Model
3.3 Selection Model
## Uncommented because change of analysis
3.4 Causative Variant Window
## Point estimate H_e across all 20 simulations for s04_chr3 : 0.01636695 //n[1] "Bootstrap 95% Confidence Interval: [0.0066107490795494, 0.026123152352932]"
## Point estimate H_e across all 20 simulations for s05_chr3 : 0.02523939 //n[1] "Bootstrap 95% Confidence Interval: [0.0111772731197686, 0.0393015168291243]"
## Point estimate H_e across all 20 simulations for s06_chr3 : 0.01384748 //n[1] "Bootstrap 95% Confidence Interval: [0.0031801100894705, 0.0245148416582593]"
## Point estimate H_e across all 20 simulations for s07_chr3 : 0.02488424 //n[1] "Bootstrap 95% Confidence Interval: [0.0086930957358842, 0.0410753749129887]"
## Point estimate H_e across all 20 simulations for s08_chr3 : 0.02342858 //n[1] "Bootstrap 95% Confidence Interval: [0.00637030228295072, 0.0404868491226634]"
3.4 Genomewide 5th percentile comparison - Expected Heterozygosity Summary
| s08_chr3 |
0.00577 |
0.00234 |
0.00920 |
| s07_chr3 |
0.00757 |
0.00391 |
0.01123 |
| s06_chr3 |
0.00819 |
0.00549 |
0.01090 |
| s05_chr3 |
0.01643 |
0.01212 |
0.02074 |
| s04_chr3 |
0.01800 |
0.01398 |
0.02203 |
| Neutral |
0.04437 |
0.04218 |
0.04657 |
| Empirical |
0.07423 |
NA |
NA |
4: Summary
4.0 General comparison
4.0.1 ROH-hotspot threshold comparison
##
## ROH-hotspot threshold comparison between the different datasets
| Neutral |
30.0 |
| Empirical |
55.4 |
| s=0.5 |
87.9 |
| s=0.7 |
88.1 |
| s=0.8 |
90.9 |
| s=0.4 |
91.7 |
| s=0.6 |
92.5 |
4.0.2 F_ROH comparison
| Neutral |
0.155 |
0.150 |
0.160 |
| s=0.4 |
0.224 |
0.211 |
0.238 |
| Empirical |
0.233 |
NA |
NA |
| s=0.5 |
0.238 |
0.226 |
0.250 |
| s=0.6 |
0.264 |
0.251 |
0.277 |
| s=0.7 |
0.284 |
0.268 |
0.299 |
| s=0.8 |
0.311 |
0.284 |
0.338 |
4.1: Hotspot comparison
4.1.1: Selection test (Sweep test)
## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.0421777"
| Hotspot_chr1_window_1 |
No |
0.04424 |
| Hotspot_chr28_window_1 |
No |
0.07244 |
| Hotspot_chr6_window_1 |
No |
0.09092 |
| Hotspot_chr11_window_2 |
No |
0.11301 |
| Hotspot_chr15_window_1 |
No |
0.11500 |
| Hotspot_chr24_window_1 |
No |
0.12110 |
| Hotspot_chr13_window_1 |
No |
0.12529 |
| Hotspot_chr11_window_1 |
No |
0.14980 |
| Hotspot_chr8_window_1 |
No |
0.15637 |
## [1] "ROH-hotspots under selection:"
4.1.2: Selection Strength Test (Sweep test)
Hotspot - Causative Window - Comparison
| Hotspot_chr11_window_2 |
17.000 |
60.0 |
0.11301 |
No |
| s=0.8 |
3.960 |
88.3 |
0.02343 |
Yes |
| s=0.6 |
2.810 |
91.3 |
0.01385 |
Yes |
| s=0.7 |
2.520 |
85.3 |
0.02488 |
Yes |
| s=0.5 |
2.355 |
86.9 |
0.02524 |
Yes |
| s=0.4 |
2.135 |
91.0 |
0.01637 |
Yes |
| Hotspot_chr15_window_1 |
1.700 |
60.9 |
0.11500 |
No |
| Hotspot_chr6_window_1 |
1.400 |
65.6 |
0.09092 |
No |
| Hotspot_chr28_window_1 |
1.400 |
75.0 |
0.07244 |
No |
| Hotspot_chr24_window_1 |
1.300 |
65.3 |
0.12110 |
No |
| Hotspot_chr11_window_1 |
1.200 |
55.9 |
0.14980 |
No |
| Hotspot_chr13_window_1 |
1.200 |
60.1 |
0.12529 |
No |
| Hotspot_chr8_window_1 |
0.300 |
57.4 |
0.15637 |
No |
| Hotspot_chr1_window_1 |
0.200 |
61.9 |
0.04424 |
No |
# plot_title <- paste("Length And Average ROH % Comparison - ROH Hotspots & Causative Windows")
plot_title <- paste("ROH Hotspot(s) & Causative Windows comparison")
# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model),
Hotspots_and_Causative_windows_comparison_sorted$Model
)
# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
"Hotspot",
"Selection Coefficient"
)
# # Create the 3D scatter plot
# s3d <- scatterplot3d(
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$H_e,
# color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
# pch = 19, # Solid circle
# xlab = "Lengths",
# ylab = "ROH Frequency",
# zlab = "H_e",
# main = plot_title
# )
#
# # Add labels to the points
# s3d.coords <- s3d$xyz.convert(
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$H_e
# )
# text(s3d.coords$x, s3d.coords$y,
# labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
# pos = 3, cex = 0.5) # pos=3 means above
# Create the 3D scatter plot
s3d <- scatterplot3d(
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
Hotspots_and_Causative_windows_comparison_sorted$H_e,
color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
pch = 19, # Solid circle
xlab = "Avg ROH-frequency (%)",
ylab = "Length (Mb)",
zlab = "H_e",
main = plot_title
)
# Add labels to the points
s3d.coords <- s3d$xyz.convert(
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
Hotspots_and_Causative_windows_comparison_sorted$H_e
)
text(s3d.coords$x, s3d.coords$y,
labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
pos = 3, cex = 0.5) # pos=3 means above

Visualizing and scaling
# Min-max-scaling normalization function, that normalizes a vector of values
min_max_normalization_fun <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
### Min max scaling ###
# Normalize Lengths_Mb
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb)
# Normalize ROH Frequency
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq)
# Normalize H_e
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$H_e)
# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model),
Hotspots_and_Causative_windows_comparison_sorted$Model
)
# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
"Hotspot",
"Selection Coefficient"
)
plot_title <- paste("Normalized ROH Hotspot(s) & Causative Windows comparison")
# Create the 3D scatter plot
s3d <- scatterplot3d(
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized,
color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
pch = 19, # Solid circle
xlab = "Normalized Lengths",
ylab = "Normalized Mean ROH Frequency",
zlab = "Normalized H_e",
main = plot_title
)
# Add labels to the points
s3d.coords <- s3d$xyz.convert(
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized
)
text(s3d.coords$x, s3d.coords$y,
labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
pos = 3, cex = 0.5) # pos=3 means above


5 Visualizing Expected Heterozygoisty
5.1 Bootstrap CI for mean 5th percentile H_e
## FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr1_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr1_window_1 :
## [1] "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr28_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr28_window_1 :
## [1] "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr6_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr6_window_1 :
## [1] "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr11_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr11_window_2 :
## [1] "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr15_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr15_window_1 :
## [1] "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr24_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr24_window_1 :
## [1] "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr13_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr13_window_1 :
## [1] "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr11_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr11_window_1 :
## [1] "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr8_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr8_window_1 :
## [1] "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
5.2 5th percentile of the extreme H_e values